import scrapy

''''
测试
scrapy crawl zzz
https://01bzw.pw/
https://01bzw.pw/html/85/85504/6004214_8.html
'''


class ZzzSpider(scrapy.Spider):
    # 前缀
    prefix = 'https://'
    # 中缀
    website = '01bzw'
    # 后缀
    suffix = '.pw/'
    name = 'zzz'
    allowed_domains = [website + '.pw']

    urlaa='https://01bzw.pw/html/38/38742/3648391.html'
    nameaa='蒋文涓的职场生活'
    def start_requests(self):
        yield scrapy.FormRequest(url=self.urlaa, formdata={'pagepwd': '1234'}, callback=self.parse)

    def parse(self, response):
        data_list = response.xpath("//div[@class='content']/p/text()").getall()
        for i in data_list:
            if i.strip():  # 现在 i 是字符串，可以使用 strip()
                # 在这里把字符串逐行写入文件
                with open(self.nameaa+'.txt', 'a', encoding='utf-8') as f:
                    f.write(i.strip() + '\n')
        url = response.xpath("//div[@class='reader-main']//a[@id='next_url']/@href").extract_first()
        name = response.xpath("//div[@class='reader-main']//a[@id='next_url']/text()").extract_first()
        if name and url:
            if name.strip() == '下一页':
                next_page_url = response.urljoin(url)
                yield scrapy.FormRequest(url=next_page_url, formdata={'pagepwd': '1234'}, callback=self.parse)
